from onnxruntime.quantization import quantize_dynamic, QuantType
import onnx

# 指定 ONNX 模型的路径
onnx_model_path = "your_model.onnx`"
quantized_model_path = "your_model_quant.onnx"

# 将模型量化为 INT8
quantize_dynamic(
    onnx_model_path,
    quantized_model_path,
    weight_type=QuantType.QInt8,
)